{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "       Unnamed: 0                       _id                        title  \\\n0               0  5dd2198f18bba2b4a9a0b854     大华电梯两房/房型正气/开门南北通/房东诚意出售   \n1               1  5dd2198f18bba2b4a9a0b855  非底楼 满五年唯一 税费少 婚房装修 楼称佳 户型方正   \n2               2  5dd2198f18bba2b4a9a0b856  满五唯一+7号线锦绣路+复式房+带阁楼+小区央位+精装   \n3               3  5dd2198f18bba2b4a9a0b857     13号线陈春路地铁400米中间楼层诚意卖看房方便   \n4               4  5dd2198f18bba2b4a9a0b858    朝阳正气一房，采光好，坐看花园，户型方正，看房方便   \n...           ...                       ...                          ...   \n69649       96117  5ddbc9f0b5aadfc07d6d5d19   满五唯一 精装修 东边套窗户全明  顶楼带阁楼和露台   \n69650       96118  5ddbc9f0b5aadfc07d6d5d1a             美兰湖颐景园 2室2厅 276万   \n69651       96136  5ddbc9f2b5aadfc07d6d5d2c               罗南二村 2室1厅 175万   \n69652       96141  5ddbc9f2b5aadfc07d6d5d31     五楼低总价，两房朝南，满五唯一，格局好，配套齐全   \n69653       96157  5ddbc9f2b5aadfc07d6d5d41      满五唯一，带电梯，精装修近地铁，户型方正楼层好   \n\n       price  up_price        xiaoqu_name   size  buildyear huxing chaoxiang  \\\n0      690.0     76531  大华锦绣华城(十六街区)(公寓)   90.16       2010  2室2厅         南    \n1      330.0     52290               芳雅苑   63.11       1995  2室1厅         南    \n2      500.0     62878               锦博苑   79.52       2007  2室2厅         南    \n3      330.0     45866              鹏海小区   71.95       1997  2室1厅         南    \n4      460.0     83942            万邦都市花园   54.80       2004  1室1厅         南    \n...      ...       ...                ...    ...        ...    ...       ...   \n69649  470.0     51894             美兰湖岭域   90.57       2010  3室1厅     东 南 北    \n69650  276.0     32838            美兰湖颐景园   84.05       2007  2室2厅         南    \n69651  175.0     25061              罗南二村   69.83       1996  2室1厅         南    \n69652  185.0     26493              罗南二村   69.83       1996  2室1厅         南    \n69653  368.0     39983             美兰湖岭域   92.04       2010  2室2厅         南    \n\n      zhuangxiu      cenggao louxing district_name sub_district_name  \n0           简装    中楼层(共18层)       板楼            浦东                北蔡  \n1           精装     低楼层(共6层)       板楼            浦东                北蔡  \n2           精装     高楼层(共6层)       板楼            浦东                北蔡  \n3           简装     中楼层(共6层)       板楼            浦东                北蔡  \n4           简装    中楼层(共11层)       板楼            浦东                北蔡  \n...         ...          ...     ...           ...               ...  \n69649       精装     高楼层(共6层)       板楼            宝山                罗店  \n69650       精装     低楼层(共5层)       板楼            宝山                罗店  \n69651       简装     高楼层(共6层)       板楼            宝山                罗店  \n69652       简装     高楼层(共6层)       板楼            宝山                罗店  \n69653       精装     中楼层(共6层)       板楼            宝山                罗店  \n\n[69654 rows x 15 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Unnamed: 0</th>\n      <th>_id</th>\n      <th>title</th>\n      <th>price</th>\n      <th>up_price</th>\n      <th>xiaoqu_name</th>\n      <th>size</th>\n      <th>buildyear</th>\n      <th>huxing</th>\n      <th>chaoxiang</th>\n      <th>zhuangxiu</th>\n      <th>cenggao</th>\n      <th>louxing</th>\n      <th>district_name</th>\n      <th>sub_district_name</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>5dd2198f18bba2b4a9a0b854</td>\n      <td>大华电梯两房/房型正气/开门南北通/房东诚意出售</td>\n      <td>690.0</td>\n      <td>76531</td>\n      <td>大华锦绣华城(十六街区)(公寓)</td>\n      <td>90.16</td>\n      <td>2010</td>\n      <td>2室2厅</td>\n      <td>南</td>\n      <td>简装</td>\n      <td>中楼层(共18层)</td>\n      <td>板楼</td>\n      <td>浦东</td>\n      <td>北蔡</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>5dd2198f18bba2b4a9a0b855</td>\n      <td>非底楼 满五年唯一 税费少 婚房装修 楼称佳 户型方正</td>\n      <td>330.0</td>\n      <td>52290</td>\n      <td>芳雅苑</td>\n      <td>63.11</td>\n      <td>1995</td>\n      <td>2室1厅</td>\n      <td>南</td>\n      <td>精装</td>\n      <td>低楼层(共6层)</td>\n      <td>板楼</td>\n      <td>浦东</td>\n      <td>北蔡</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>5dd2198f18bba2b4a9a0b856</td>\n      <td>满五唯一+7号线锦绣路+复式房+带阁楼+小区央位+精装</td>\n      <td>500.0</td>\n      <td>62878</td>\n      <td>锦博苑</td>\n      <td>79.52</td>\n      <td>2007</td>\n      <td>2室2厅</td>\n      <td>南</td>\n      <td>精装</td>\n      <td>高楼层(共6层)</td>\n      <td>板楼</td>\n      <td>浦东</td>\n      <td>北蔡</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>5dd2198f18bba2b4a9a0b857</td>\n      <td>13号线陈春路地铁400米中间楼层诚意卖看房方便</td>\n      <td>330.0</td>\n      <td>45866</td>\n      <td>鹏海小区</td>\n      <td>71.95</td>\n      <td>1997</td>\n      <td>2室1厅</td>\n      <td>南</td>\n      <td>简装</td>\n      <td>中楼层(共6层)</td>\n      <td>板楼</td>\n      <td>浦东</td>\n      <td>北蔡</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>5dd2198f18bba2b4a9a0b858</td>\n      <td>朝阳正气一房，采光好，坐看花园，户型方正，看房方便</td>\n      <td>460.0</td>\n      <td>83942</td>\n      <td>万邦都市花园</td>\n      <td>54.80</td>\n      <td>2004</td>\n      <td>1室1厅</td>\n      <td>南</td>\n      <td>简装</td>\n      <td>中楼层(共11层)</td>\n      <td>板楼</td>\n      <td>浦东</td>\n      <td>北蔡</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>69649</th>\n      <td>96117</td>\n      <td>5ddbc9f0b5aadfc07d6d5d19</td>\n      <td>满五唯一 精装修 东边套窗户全明  顶楼带阁楼和露台</td>\n      <td>470.0</td>\n      <td>51894</td>\n      <td>美兰湖岭域</td>\n      <td>90.57</td>\n      <td>2010</td>\n      <td>3室1厅</td>\n      <td>东 南 北</td>\n      <td>精装</td>\n      <td>高楼层(共6层)</td>\n      <td>板楼</td>\n      <td>宝山</td>\n      <td>罗店</td>\n    </tr>\n    <tr>\n      <th>69650</th>\n      <td>96118</td>\n      <td>5ddbc9f0b5aadfc07d6d5d1a</td>\n      <td>美兰湖颐景园 2室2厅 276万</td>\n      <td>276.0</td>\n      <td>32838</td>\n      <td>美兰湖颐景园</td>\n      <td>84.05</td>\n      <td>2007</td>\n      <td>2室2厅</td>\n      <td>南</td>\n      <td>精装</td>\n      <td>低楼层(共5层)</td>\n      <td>板楼</td>\n      <td>宝山</td>\n      <td>罗店</td>\n    </tr>\n    <tr>\n      <th>69651</th>\n      <td>96136</td>\n      <td>5ddbc9f2b5aadfc07d6d5d2c</td>\n      <td>罗南二村 2室1厅 175万</td>\n      <td>175.0</td>\n      <td>25061</td>\n      <td>罗南二村</td>\n      <td>69.83</td>\n      <td>1996</td>\n      <td>2室1厅</td>\n      <td>南</td>\n      <td>简装</td>\n      <td>高楼层(共6层)</td>\n      <td>板楼</td>\n      <td>宝山</td>\n      <td>罗店</td>\n    </tr>\n    <tr>\n      <th>69652</th>\n      <td>96141</td>\n      <td>5ddbc9f2b5aadfc07d6d5d31</td>\n      <td>五楼低总价，两房朝南，满五唯一，格局好，配套齐全</td>\n      <td>185.0</td>\n      <td>26493</td>\n      <td>罗南二村</td>\n      <td>69.83</td>\n      <td>1996</td>\n      <td>2室1厅</td>\n      <td>南</td>\n      <td>简装</td>\n      <td>高楼层(共6层)</td>\n      <td>板楼</td>\n      <td>宝山</td>\n      <td>罗店</td>\n    </tr>\n    <tr>\n      <th>69653</th>\n      <td>96157</td>\n      <td>5ddbc9f2b5aadfc07d6d5d41</td>\n      <td>满五唯一，带电梯，精装修近地铁，户型方正楼层好</td>\n      <td>368.0</td>\n      <td>39983</td>\n      <td>美兰湖岭域</td>\n      <td>92.04</td>\n      <td>2010</td>\n      <td>2室2厅</td>\n      <td>南</td>\n      <td>精装</td>\n      <td>中楼层(共6层)</td>\n      <td>板楼</td>\n      <td>宝山</td>\n      <td>罗店</td>\n    </tr>\n  </tbody>\n</table>\n<p>69654 rows × 15 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 101
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.read_csv('D:\\资料\\码上行动\\零基础\\数据分析\\houses.csv',encoding='gbk')\n",
    "df = df.drop_duplicates()\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "65596\n8333\n165\n"
    }
   ],
   "source": [
    "#统计出总房源数量、总小区数量、总版块数量\n",
    "print(len(df['title'].value_counts()))\n",
    "print(len(df['xiaoqu_name'].value_counts()))\n",
    "print(len(df['sub_district_name'].value_counts()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "['2室2厅 ' '2室1厅 ' '1室1厅 ' '4室3厅 ' '3室1厅 ' '3室2厅 ' '4室2厅 ' '1室2厅 ' '2室0厅 '\n '5室1厅 ' '1室0厅 ' '5室3厅 ' '5室2厅 ' '4室1厅 ' '4室4厅 ' '6室3厅 ' '5室6厅 ' '6室2厅 '\n '5室4厅 ' '7室3厅 ' '3室3厅 ' '9室2厅 ' '7室2厅 ' '7室1厅 ' '6室4厅 ' '6室1厅 ' '8室4厅 '\n '3室0厅 ' '7室4厅 ' '7室5厅 ' '4室5厅 ' '9室6厅 ' '9室4厅 ' '6室5厅 ' '10室1厅 ' '4室0厅 '\n '8室1厅 ' '9室3厅 ' '9室9厅 ' '5室5厅 ' '8室3厅 ' '2室3厅 ' '7室6厅 ' '8室2厅 ' '0室0厅 '\n '6室6厅 ' '10室6厅 ' '2室4厅 ' '6室0厅 ' '10室2厅 ' '7室0厅 ' '3室4厅 ' '8室5厅 '\n '10室3厅 ' '5室0厅 ' '3室5厅 ']\n[' 简装 ' ' 精装 ' ' 毛坯 ' ' 其他 ']\n[2010 1995 2007 1997 2004 1994 2011 2012 1998 2013 1999 2016 2000 2005\n 1996 2003 2015 2006 2009 1993 2002 2014 1992 2008 2001 2019 2018 1987\n 1991 2017 1989 1988 1982 1983 1984 1981 1985 1977 1986 1990 1979 1980\n 1953 1976 1958 1978 1969 1974 1975 1955 1971 1954 1956 1959 1968 1964\n 1960 1967 1972 1961 1973 1936 1911 1941 1948 1949 1939 1933 1940 1946\n 1937 1910 1945 1957 1965 1966 1970 1963 1950 1962 1930 1920 1947 1938\n 1934 1935 1921 1926 1912 1924 1928 1931 1943 1900 1952 1951]\n"
    }
   ],
   "source": [
    "#统计出有哪些不同的户型、有哪些不同的装修情况、有哪些不同的建造年份\n",
    "print(df['huxing'].unique())\n",
    "print(df['zhuangxiu'].unique())\n",
    "print(df['buildyear'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "55051.4809056192"
     },
     "metadata": {},
     "execution_count": 104
    }
   ],
   "source": [
    "#计算出上海二手房每平方米均价\n",
    "df['up_price'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "               mean  count\nhuxing                    \n2室1厅      66.915066  17364\n2室2厅      93.016552  12990\n3室2厅     129.362787  12618\n1室1厅      46.955651  12103\n4室2厅     208.867312   3613\n3室1厅      91.561336   3451\n1室0厅      36.206541   1914\n2室0厅      49.615223   1369\n1室2厅      67.410990   1192\n5室2厅     286.614175    855\n5室3厅     360.780565    584\n4室3厅     272.177951    566\n6室3厅     395.391215    181\n6室2厅     289.408435    147\n4室1厅     154.547586    145\n3室3厅     175.504030    134\n3室0厅      68.571869    107\n5室4厅     403.056667     51\n6室4厅     516.186122     49\n7室3厅     461.077222     36\n5室1厅     225.869375     32\n7室2厅     310.042667     30\n4室4厅     265.587143     21\n6室1厅     170.230909     11\n7室4厅     535.334444      9\n8室2厅     366.875556      9\n2室3厅     118.597500      8\n6室5厅     669.482857      7\n8室3厅     474.710000      6\n7室5厅     532.396000      5\n8室4厅     622.384000      5\n9室2厅     757.650000      4\n0室0厅     237.622500      4\n9室4厅     409.463333      3\n4室0厅     192.590000      3\n5室6厅     344.720000      3\n6室0厅     504.425000      2\n5室5厅     334.585000      2\n9室6厅     473.775000      2\n7室1厅     177.200000      2\n7室6厅     406.005000      2\n2室4厅     129.400000      1\n9室3厅     593.010000      1\n10室2厅    370.000000      1\n8室5厅    1663.100000      1\n10室3厅    598.320000      1\n10室6厅    473.610000      1\n3室4厅      94.370000      1\n8室1厅     386.170000      1\n3室5厅     376.210000      1\n4室5厅     321.000000      1\n7室0厅     161.440000      1\n6室6厅    1440.160000      1\n5室0厅      89.600000      1\n10室1厅    544.990000      1\n9室9厅    1352.240000      1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>mean</th>\n      <th>count</th>\n    </tr>\n    <tr>\n      <th>huxing</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2室1厅</th>\n      <td>66.915066</td>\n      <td>17364</td>\n    </tr>\n    <tr>\n      <th>2室2厅</th>\n      <td>93.016552</td>\n      <td>12990</td>\n    </tr>\n    <tr>\n      <th>3室2厅</th>\n      <td>129.362787</td>\n      <td>12618</td>\n    </tr>\n    <tr>\n      <th>1室1厅</th>\n      <td>46.955651</td>\n      <td>12103</td>\n    </tr>\n    <tr>\n      <th>4室2厅</th>\n      <td>208.867312</td>\n      <td>3613</td>\n    </tr>\n    <tr>\n      <th>3室1厅</th>\n      <td>91.561336</td>\n      <td>3451</td>\n    </tr>\n    <tr>\n      <th>1室0厅</th>\n      <td>36.206541</td>\n      <td>1914</td>\n    </tr>\n    <tr>\n      <th>2室0厅</th>\n      <td>49.615223</td>\n      <td>1369</td>\n    </tr>\n    <tr>\n      <th>1室2厅</th>\n      <td>67.410990</td>\n      <td>1192</td>\n    </tr>\n    <tr>\n      <th>5室2厅</th>\n      <td>286.614175</td>\n      <td>855</td>\n    </tr>\n    <tr>\n      <th>5室3厅</th>\n      <td>360.780565</td>\n      <td>584</td>\n    </tr>\n    <tr>\n      <th>4室3厅</th>\n      <td>272.177951</td>\n      <td>566</td>\n    </tr>\n    <tr>\n      <th>6室3厅</th>\n      <td>395.391215</td>\n      <td>181</td>\n    </tr>\n    <tr>\n      <th>6室2厅</th>\n      <td>289.408435</td>\n      <td>147</td>\n    </tr>\n    <tr>\n      <th>4室1厅</th>\n      <td>154.547586</td>\n      <td>145</td>\n    </tr>\n    <tr>\n      <th>3室3厅</th>\n      <td>175.504030</td>\n      <td>134</td>\n    </tr>\n    <tr>\n      <th>3室0厅</th>\n      <td>68.571869</td>\n      <td>107</td>\n    </tr>\n    <tr>\n      <th>5室4厅</th>\n      <td>403.056667</td>\n      <td>51</td>\n    </tr>\n    <tr>\n      <th>6室4厅</th>\n      <td>516.186122</td>\n      <td>49</td>\n    </tr>\n    <tr>\n      <th>7室3厅</th>\n      <td>461.077222</td>\n      <td>36</td>\n    </tr>\n    <tr>\n      <th>5室1厅</th>\n      <td>225.869375</td>\n      <td>32</td>\n    </tr>\n    <tr>\n      <th>7室2厅</th>\n      <td>310.042667</td>\n      <td>30</td>\n    </tr>\n    <tr>\n      <th>4室4厅</th>\n      <td>265.587143</td>\n      <td>21</td>\n    </tr>\n    <tr>\n      <th>6室1厅</th>\n      <td>170.230909</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>7室4厅</th>\n      <td>535.334444</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>8室2厅</th>\n      <td>366.875556</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>2室3厅</th>\n      <td>118.597500</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>6室5厅</th>\n      <td>669.482857</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>8室3厅</th>\n      <td>474.710000</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>7室5厅</th>\n      <td>532.396000</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>8室4厅</th>\n      <td>622.384000</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>9室2厅</th>\n      <td>757.650000</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>0室0厅</th>\n      <td>237.622500</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>9室4厅</th>\n      <td>409.463333</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>4室0厅</th>\n      <td>192.590000</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>5室6厅</th>\n      <td>344.720000</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>6室0厅</th>\n      <td>504.425000</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>5室5厅</th>\n      <td>334.585000</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>9室6厅</th>\n      <td>473.775000</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>7室1厅</th>\n      <td>177.200000</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>7室6厅</th>\n      <td>406.005000</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2室4厅</th>\n      <td>129.400000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9室3厅</th>\n      <td>593.010000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>10室2厅</th>\n      <td>370.000000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8室5厅</th>\n      <td>1663.100000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>10室3厅</th>\n      <td>598.320000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>10室6厅</th>\n      <td>473.610000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3室4厅</th>\n      <td>94.370000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8室1厅</th>\n      <td>386.170000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3室5厅</th>\n      <td>376.210000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4室5厅</th>\n      <td>321.000000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7室0厅</th>\n      <td>161.440000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>6室6厅</th>\n      <td>1440.160000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5室0厅</th>\n      <td>89.600000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>10室1厅</th>\n      <td>544.990000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9室9厅</th>\n      <td>1352.240000</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 105
    }
   ],
   "source": [
    "#统计出不同户型对应的房源数量、平均面积。按不同户型的房源数量，从大到小进行排序，判断出主流户型是什么？平均面积多大？\n",
    "zhuliu_huxing=df['size'].groupby(df['huxing']).agg(['mean','count']).sort_values('count',ascending=False)\n",
    "zhuliu_huxing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "xiaoqu_name\n上海康城         216\n同润菲诗艾伦       117\n中远两湾城        114\n美岸栖庭(公寓)     106\n金地艺境(宝山)     106\n            ... \n协虹苑            1\n协诚中心大厦         1\n柳江街21          1\n南三小区           1\n里仁新村           1\nName: title, Length: 8333, dtype: int64"
     },
     "metadata": {},
     "execution_count": 106
    }
   ],
   "source": [
    "#每个小区的房源数量，并按从大到小的顺序排序\n",
    "xiaoqu_fangyuan=df['title'].groupby(df['xiaoqu_name']).count().sort_values(ascending=False)\n",
    "xiaoqu_fangyuan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "xiaoqu_name\n常熟路182号      527406.000000\n良友别墅         393443.000000\n霞飞别墅         369767.000000\n复兴西路299弄     356385.666667\n兴国路322号      318560.500000\n                 ...      \n御景龙庭玲珑公馆      13226.000000\n临潮二村          13093.000000\n华鼎大楼          13055.000000\n万寿新村(金山)      12680.000000\n石化一村          12144.000000\nName: up_price, Length: 8333, dtype: float64"
     },
     "metadata": {},
     "execution_count": 107
    }
   ],
   "source": [
    "#统计出上海每个小区每平方米的均价\n",
    "xiaoqu_junjia=df['up_price'].groupby(df['xiaoqu_name']).mean().sort_values(ascending=False)\n",
    "xiaoqu_junjia"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "                              title    price\n46396   花园18亩 开门见3座山 背面靠山 南东2面临天然大河  35000.0\n46451     仟万精装、华庭岛屿大独栋！5亩占地、自住！价格可谈  30000.0\n18891  罕见！占地10亩庄园，整个园区的King！让上海都在仰望  30000.0\n28332    安福路/常熟路 沿街独幢花园洋房，英式风格 地价已补  24999.0\n18875    （岛宅主推）占地8亩上产证 两面邻水靠大河 景观位置  21000.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>46396</th>\n      <td>花园18亩 开门见3座山 背面靠山 南东2面临天然大河</td>\n      <td>35000.0</td>\n    </tr>\n    <tr>\n      <th>46451</th>\n      <td>仟万精装、华庭岛屿大独栋！5亩占地、自住！价格可谈</td>\n      <td>30000.0</td>\n    </tr>\n    <tr>\n      <th>18891</th>\n      <td>罕见！占地10亩庄园，整个园区的King！让上海都在仰望</td>\n      <td>30000.0</td>\n    </tr>\n    <tr>\n      <th>28332</th>\n      <td>安福路/常熟路 沿街独幢花园洋房，英式风格 地价已补</td>\n      <td>24999.0</td>\n    </tr>\n    <tr>\n      <th>18875</th>\n      <td>（岛宅主推）占地8亩上产证 两面邻水靠大河 景观位置</td>\n      <td>21000.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 108
    }
   ],
   "source": [
    "#统计出上海二手房总价排在前 5 位的房源，看看超级豪宅有多壕？\n",
    "haozhai = df.sort_values('price',ascending=False).loc[:,['title','price']]\n",
    "haozhai.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "title\n花园18亩 开门见3座山 背面靠山 南东2面临天然大河     35000.0\n罕见！占地10亩庄园，整个园区的King！让上海都在仰望    30000.0\n仟万精装、华庭岛屿大独栋！5亩占地、自住！价格可谈       30000.0\n安福路/常熟路 沿街独幢花园洋房，英式风格 地价已补      24999.0\n（岛宅主推）占地8亩上产证 两面邻水靠大河 景观位置      21000.0\nName: price, dtype: float64"
     },
     "metadata": {},
     "execution_count": 114
    }
   ],
   "source": [
    "df.groupby('title')['price'].mean().sort_values(ascending=False).head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "buildyear\n1900-12-31        1\n1910-12-31        1\n1920-12-31        7\n1930-12-31       36\n1940-12-31      112\n1950-12-31       40\n1960-12-31      425\n1970-12-31      234\n1980-12-31     1282\n1990-12-31     8536\n2000-12-31    23747\n2010-12-31    25136\n2020-12-31    10097\nFreq: 10A-DEC, Name: title, dtype: int64"
     },
     "metadata": {},
     "execution_count": 145
    }
   ],
   "source": [
    "#计算出不同年代的房子数量（10年一个区间）\n",
    "pd.to_datetime(df['buildyear'],format=\"%Y\")\n",
    "df.set_index('buildyear')\n",
    "\n",
    "df2=df['title'].groupby(df['buildyear']).count()\n",
    "\n",
    "df2.resample('10A').sum()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}